deltas: Search for similar objects (possibly renamed across directories)
authorColin Walters <walters@verbum.org>
Wed, 11 Feb 2015 08:29:14 +0000 (03:29 -0500)
committerColin Walters <walters@verbum.org>
Mon, 16 Feb 2015 15:10:35 +0000 (10:10 -0500)
The previous diff algorithm was file tree based, and only looked
at modified files that lived at the same path.

However, components like the Linux kernel have versioned
subdirectories, e.g. /usr/lib/modules/$kver/.../ext4.ko.  We want to
be able to detect these "modified renames" so that we can compute
diffs (rollsum, bsdiff).

Makefile-libostree.am
src/libostree/ostree-repo-static-delta-compilation-analysis.c [new file with mode: 0644]
src/libostree/ostree-repo-static-delta-compilation.c
src/libostree/ostree-repo-static-delta-private.h

index f77a36a786e59f6afb19fe72f4dd5e07d6741a9f..a8ba8bc9f3b316c151b69612c621e2769e982a42 100644 (file)
@@ -91,6 +91,7 @@ libostree_1_la_SOURCES = \
        src/libostree/ostree-repo-static-delta-core.c \
        src/libostree/ostree-repo-static-delta-processing.c \
        src/libostree/ostree-repo-static-delta-compilation.c \
+       src/libostree/ostree-repo-static-delta-compilation-analysis.c \
        src/libostree/ostree-repo-static-delta-private.h \
        $(NULL)
 if USE_LIBARCHIVE
diff --git a/src/libostree/ostree-repo-static-delta-compilation-analysis.c b/src/libostree/ostree-repo-static-delta-compilation-analysis.c
new file mode 100644 (file)
index 0000000..39c818f
--- /dev/null
@@ -0,0 +1,305 @@
+/* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*-
+ *
+ * Copyright (C) 2015 Colin Walters <walters@verbum.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include "config.h"
+
+#include <string.h>
+#include <gio/gunixoutputstream.h>
+
+#include "ostree-core-private.h"
+#include "ostree-repo-private.h"
+#include "ostree-lzma-compressor.h"
+#include "ostree-repo-static-delta-private.h"
+#include "ostree-diff.h"
+#include "ostree-rollsum.h"
+#include "otutil.h"
+#include "ostree-varint.h"
+
+void
+_ostree_delta_content_sizenames_free (gpointer v)
+{
+  OstreeDeltaContentSizeNames *ce = v;
+  g_free (ce->checksum);
+  g_ptr_array_unref (ce->basenames);
+  g_free (ce);
+}
+
+static gboolean
+build_content_sizenames_recurse (OstreeRepo                     *repo,
+                                 OstreeRepoCommitTraverseIter   *iter,
+                                 GHashTable                     *sizenames_map,
+                                 GHashTable                     *include_only_objects,
+                                 GCancellable                   *cancellable,
+                                 GError                        **error)
+{
+  gboolean ret = FALSE;
+
+  while (TRUE)
+    {
+      OstreeRepoCommitIterResult iterres =
+        ostree_repo_commit_traverse_iter_next (iter, cancellable, error);
+          
+      if (iterres == OSTREE_REPO_COMMIT_ITER_RESULT_ERROR)
+        goto out;
+      else if (iterres == OSTREE_REPO_COMMIT_ITER_RESULT_END)
+        break;
+      else if (iterres == OSTREE_REPO_COMMIT_ITER_RESULT_FILE)
+        {
+          char *name;
+          char *checksum;
+          OstreeDeltaContentSizeNames *csizenames;
+            
+          ostree_repo_commit_traverse_iter_get_file (iter, &name, &checksum);
+
+          if (include_only_objects && !g_hash_table_contains (include_only_objects, checksum))
+            continue;
+
+          csizenames = g_hash_table_lookup (sizenames_map, checksum);
+          if (!csizenames)
+            {
+              gs_unref_object GFileInfo *finfo = NULL;
+
+              csizenames = g_new0 (OstreeDeltaContentSizeNames, 1);
+              csizenames->checksum = g_strdup (checksum);
+              
+              /* Transfer ownership so things get cleaned up if we
+               * throw an exception below.
+               */
+              g_hash_table_replace (sizenames_map, csizenames->checksum, csizenames);
+
+              if (!ostree_repo_load_file (repo, checksum,
+                                          NULL, &finfo, NULL,
+                                          cancellable, error))
+                goto out;
+              
+              csizenames->size = g_file_info_get_size (finfo);
+            }
+
+          if (!csizenames->basenames)
+            csizenames->basenames = g_ptr_array_new_with_free_func (g_free);
+          g_ptr_array_add (csizenames->basenames, g_strdup (name));
+        }
+      else if (iterres == OSTREE_REPO_COMMIT_ITER_RESULT_DIR)
+        {
+          char *name;
+          char *content_checksum;
+          char *meta_checksum;
+          gs_unref_variant GVariant *dirtree = NULL;
+          ostree_cleanup_repo_commit_traverse_iter
+            OstreeRepoCommitTraverseIter subiter = { 0, };
+
+          ostree_repo_commit_traverse_iter_get_dir (iter, &name, &content_checksum, &meta_checksum);
+          
+          if (!ostree_repo_load_variant (repo, OSTREE_OBJECT_TYPE_DIR_TREE,
+                                         content_checksum, &dirtree,
+                                         error))
+            goto out;
+
+          if (!ostree_repo_commit_traverse_iter_init_dirtree (&subiter, repo, dirtree,
+                                                              OSTREE_REPO_COMMIT_TRAVERSE_FLAG_NONE,
+                                                              error))
+            goto out;
+
+          if (!build_content_sizenames_recurse (repo, &subiter,
+                                                sizenames_map, include_only_objects,
+                                                cancellable, error))
+            goto out;
+        }
+      else
+        g_assert_not_reached ();
+    }
+  ret = TRUE;
+ out:
+  return ret;
+}
+
+static int
+compare_sizenames (const void  *a,
+                   const void  *b)
+{
+  OstreeDeltaContentSizeNames *sn_a = *(OstreeDeltaContentSizeNames**)(void*)a;
+  OstreeDeltaContentSizeNames *sn_b = *(OstreeDeltaContentSizeNames**)(void*)b;
+
+  return sn_a->size - sn_b->size;
+}
+
+/**
+ * Generate a sorted array of [(checksum: str, size: uint64, names: array[string]), ...]
+ * for regular file content.
+ */
+static gboolean
+build_content_sizenames_filtered (OstreeRepo              *repo,
+                                  GVariant                *commit,
+                                  GHashTable              *include_only_objects,
+                                  GPtrArray              **out_sizenames,
+                                  GCancellable            *cancellable,
+                                  GError                 **error)
+{
+  gboolean ret = FALSE;
+  gs_unref_ptrarray GPtrArray *ret_sizenames =
+    g_ptr_array_new_with_free_func (_ostree_delta_content_sizenames_free);
+  gs_unref_hashtable GHashTable *sizenames_map =
+    g_hash_table_new_full (g_str_hash, g_str_equal, NULL, _ostree_delta_content_sizenames_free);
+  ostree_cleanup_repo_commit_traverse_iter
+    OstreeRepoCommitTraverseIter iter = { 0, };
+
+  if (!ostree_repo_commit_traverse_iter_init_commit (&iter, repo, commit,
+                                                     OSTREE_REPO_COMMIT_TRAVERSE_FLAG_NONE,
+                                                     error))
+    goto out;
+
+  if (!build_content_sizenames_recurse (repo, &iter, sizenames_map, include_only_objects,
+                                        cancellable, error))
+    goto out;
+
+  { GHashTableIter hashiter;
+    gpointer hkey, hvalue;
+
+    g_hash_table_iter_init (&hashiter, sizenames_map);
+    while (g_hash_table_iter_next (&hashiter, &hkey, &hvalue))
+      {
+        g_hash_table_iter_steal (&hashiter);
+        g_ptr_array_add (ret_sizenames, hvalue);
+      }
+  }
+
+  g_ptr_array_sort (ret_sizenames, compare_sizenames);
+
+  ret = TRUE;
+  gs_transfer_out_value (out_sizenames, &ret_sizenames);
+ out:
+  return ret;
+}
+
+static gboolean
+string_array_nonempty_intersection (GPtrArray    *a,
+                                    GPtrArray    *b)
+{
+  guint i;
+  for (i = 0; i < a->len; i++)
+    {
+      guint j;
+      const char *a_str = a->pdata[i];
+      for (j = 0; j < b->len; j++)
+        {
+          const char *b_str = b->pdata[j];
+          if (strcmp (a_str, b_str) == 0)
+            return TRUE;
+        }
+    }
+  return FALSE;
+}
+
+/*
+ * Build up a map of files with matching basenames and similar size,
+ * and use it to find apparently similar objects.
+ *
+ * @new_reachable_regfile_content is a Set<checksum> of new regular
+ * file objects.
+ *
+ * Currently, @out_modified_regfile_content will be a Map<to checksum,from checksum>;
+ * however in the future it would be easy to have this function return
+ * multiple candidate matches.  The hard part would be changing
+ * the delta compiler to iterate over all matches, determine
+ * a cost for each one, then pick the best.
+ */
+gboolean
+_ostree_delta_compute_similar_objects (OstreeRepo                 *repo,
+                                       GVariant                   *from_commit,
+                                       GVariant                   *to_commit,
+                                       GHashTable                 *new_reachable_regfile_content,
+                                       guint                       similarity_percent_threshold,
+                                       GHashTable                **out_modified_regfile_content,
+                                       GCancellable               *cancellable,
+                                       GError                    **error)
+{
+  gboolean ret = FALSE;
+  gs_unref_hashtable GHashTable *ret_modified_regfile_content =
+    g_hash_table_new_full (g_str_hash, g_str_equal, g_free, (GDestroyNotify)g_ptr_array_unref);
+  gs_unref_ptrarray GPtrArray *from_sizes = NULL;
+  gs_unref_ptrarray GPtrArray *to_sizes = NULL;
+  guint i, j;
+  guint lower;
+  guint upper;
+
+  if (!build_content_sizenames_filtered (repo, from_commit, NULL,
+                                         &from_sizes,
+                                         cancellable, error))
+    goto out;
+
+  if (!build_content_sizenames_filtered (repo, to_commit, new_reachable_regfile_content,
+                                         &to_sizes,
+                                         cancellable, error))
+    goto out;
+  
+  /* Iterate over all newly added objects, find objects which have
+   * similar basename and sizes.
+   *
+   * Because the arrays are sorted by size, we can maintain a `lower`
+   * bound on the original (from) objects to start searching.
+   */
+  lower = 0;
+  upper = from_sizes->len;
+  for (i = 0; i < to_sizes->len; i++)
+    {
+      OstreeDeltaContentSizeNames *to_sizenames = to_sizes->pdata[i];
+      const guint64 min_threshold = to_sizenames->size *
+        (1.0-similarity_percent_threshold/100.0);
+      const guint64 max_threshold = to_sizenames->size *
+        (1.0+similarity_percent_threshold/100.0);
+
+      /* Don't build candidates for the empty object */
+      if (to_sizenames->size == 0)
+        continue;
+
+      for (j = lower; j < upper; j++)
+        {
+          OstreeDeltaContentSizeNames *from_sizenames = from_sizes->pdata[j];
+
+          /* Don't build candidates for the empty object */
+          if (from_sizenames->size == 0)
+            continue;
+
+          if (from_sizenames->size < min_threshold)
+            {
+              lower++;
+              continue;
+            }
+
+          if (from_sizenames->size > max_threshold)
+            break;
+
+          if (!string_array_nonempty_intersection (from_sizenames->basenames, to_sizenames->basenames))
+            continue;
+            
+          /* Only one candidate right now */
+          g_hash_table_insert (ret_modified_regfile_content,
+                               g_strdup (to_sizenames->checksum),
+                               g_strdup (from_sizenames->checksum));
+          break;
+        }
+    }
+
+  ret = TRUE;
+  gs_transfer_out_value (out_modified_regfile_content, &ret_modified_regfile_content);
+ out:
+  return ret;
+}
+
index 62d006ef888ac90094ee7ca8e0a71810bc2b1b00..b3ce797c7c41b99ed5b4fd32e0081ff8a75a8020 100644 (file)
@@ -32,6 +32,8 @@
 #include "otutil.h"
 #include "ostree-varint.h"
 
+#define CONTENT_SIZE_SIMILARITY_THRESHOLD_PERCENT (30)
+
 typedef struct {
   guint64 uncompressed_size;
   GPtrArray *objects;
@@ -479,7 +481,7 @@ try_content_rollsum (OstreeRepo                       *repo,
   gs_unref_bytes GBytes *tmp_to = NULL;
   gs_unref_object GFileInfo *from_finfo = NULL;
   gs_unref_object GFileInfo *to_finfo = NULL;
-  OstreeRollsumMatches *matches;
+  OstreeRollsumMatches *matches = NULL;
   ContentRollsum *ret_rollsum = NULL;
 
   *out_rollsum = NULL;
@@ -669,7 +671,6 @@ process_one_rollsum (OstreeRepo                       *repo,
   return ret;
 }
 
-
 static gboolean 
 generate_delta_lowlatency (OstreeRepo                       *repo,
                            const char                       *from,
@@ -681,18 +682,18 @@ generate_delta_lowlatency (OstreeRepo                       *repo,
   gboolean ret = FALSE;
   GHashTableIter hashiter;
   gpointer key, value;
-  guint i;
   OstreeStaticDeltaPartBuilder *current_part = NULL;
   gs_unref_object GFile *root_from = NULL;
+  gs_unref_variant GVariant *from_commit = NULL;
   gs_unref_object GFile *root_to = NULL;
-  gs_unref_ptrarray GPtrArray *modified = NULL;
-  gs_unref_ptrarray GPtrArray *removed = NULL;
-  gs_unref_ptrarray GPtrArray *added = NULL;
+  gs_unref_variant GVariant *to_commit = NULL;
   gs_unref_hashtable GHashTable *to_reachable_objects = NULL;
   gs_unref_hashtable GHashTable *from_reachable_objects = NULL;
+  gs_unref_hashtable GHashTable *from_regfile_content = NULL;
   gs_unref_hashtable GHashTable *new_reachable_metadata = NULL;
-  gs_unref_hashtable GHashTable *new_reachable_content = NULL;
-  gs_unref_hashtable GHashTable *modified_content_objects = NULL;
+  gs_unref_hashtable GHashTable *new_reachable_regfile_content = NULL;
+  gs_unref_hashtable GHashTable *new_reachable_symlink_content = NULL;
+  gs_unref_hashtable GHashTable *modified_regfile_content = NULL;
   gs_unref_hashtable GHashTable *rollsum_optimized_content_objects = NULL;
   gs_unref_hashtable GHashTable *content_object_to_size = NULL;
 
@@ -701,51 +702,30 @@ generate_delta_lowlatency (OstreeRepo                       *repo,
       if (!ostree_repo_read_commit (repo, from, &root_from, NULL,
                                     cancellable, error))
         goto out;
-    }
-  if (!ostree_repo_read_commit (repo, to, &root_to, NULL,
-                                cancellable, error))
-    goto out;
-
-  /* Gather a filesystem level diff; when we do heuristics to ship
-   * just parts of changed files, we can make use of this data.
-   */
-  modified = g_ptr_array_new_with_free_func ((GDestroyNotify) ostree_diff_item_unref);
-  removed = g_ptr_array_new_with_free_func ((GDestroyNotify) g_object_unref);
-  added = g_ptr_array_new_with_free_func ((GDestroyNotify) g_object_unref);
-  if (!ostree_diff_dirs (OSTREE_DIFF_FLAGS_NONE, root_from, root_to, modified, removed, added,
-                         cancellable, error))
-    goto out;
 
-  modified_content_objects = g_hash_table_new_full (g_str_hash, g_str_equal,
-                                                    g_free, g_free);
-  for (i = 0; i < modified->len; i++)
-    {
-      OstreeDiffItem *diffitem = modified->pdata[i];
-      /* Theoretically, a target file could replace multiple source
-       * files.  That could happen if say a project changed from having
-       * multiple binaries to one binary.
-       *
-       * In that case, we have last one wins behavior.  For ELF rollsum
-       * tends to be useless unless there's a large static data blob.
-       */
-      g_hash_table_replace (modified_content_objects,
-                            g_strdup (diffitem->target_checksum),
-                            g_strdup (diffitem->src_checksum));
-    }
+      if (!ostree_repo_load_variant (repo, OSTREE_OBJECT_TYPE_COMMIT, from,
+                                     &from_commit, error))
+        goto out;
 
-  if (from)
-    {
       if (!ostree_repo_traverse_commit (repo, from, 0, &from_reachable_objects,
                                         cancellable, error))
         goto out;
     }
 
+  if (!ostree_repo_read_commit (repo, to, &root_to, NULL,
+                                cancellable, error))
+    goto out;
+  if (!ostree_repo_load_variant (repo, OSTREE_OBJECT_TYPE_COMMIT, to,
+                                 &to_commit, error))
+    goto out;
+
   if (!ostree_repo_traverse_commit (repo, to, 0, &to_reachable_objects,
                                     cancellable, error))
     goto out;
 
   new_reachable_metadata = ostree_repo_traverse_new_reachable ();
-  new_reachable_content = ostree_repo_traverse_new_reachable ();
+  new_reachable_regfile_content = g_hash_table_new_full (g_str_hash, g_str_equal, NULL, g_free);
+  new_reachable_symlink_content = g_hash_table_new_full (g_str_hash, g_str_equal, NULL, g_free);
 
   g_hash_table_iter_init (&hashiter, to_reachable_objects);
   while (g_hash_table_iter_next (&hashiter, &key, &value))
@@ -763,14 +743,41 @@ generate_delta_lowlatency (OstreeRepo                       *repo,
       if (OSTREE_OBJECT_TYPE_IS_META (objtype))
         g_hash_table_add (new_reachable_metadata, serialized_key);
       else
-        g_hash_table_add (new_reachable_content, serialized_key);
+        {
+          gs_unref_object GFileInfo *finfo = NULL;
+          GFileType ftype;
+
+          if (!ostree_repo_load_file (repo, checksum, NULL, &finfo, NULL,
+                                      cancellable, error))
+            goto out;
+
+          ftype = g_file_info_get_file_type (finfo);
+          if (ftype == G_FILE_TYPE_REGULAR)
+            g_hash_table_add (new_reachable_regfile_content, g_strdup (checksum));
+          else if (ftype == G_FILE_TYPE_SYMBOLIC_LINK)
+            g_hash_table_add (new_reachable_symlink_content, g_strdup (checksum));
+          else
+            g_assert_not_reached ();
+        }
     }
-  
-  g_printerr ("modified: %u removed: %u added: %u\n",
-              modified->len, removed->len, added->len);
-  g_printerr ("new reachable: metadata=%u content=%u\n",
+
+  if (from_commit)
+    {
+      if (!_ostree_delta_compute_similar_objects (repo, from_commit, to_commit,
+                                                  new_reachable_regfile_content,
+                                                  CONTENT_SIZE_SIMILARITY_THRESHOLD_PERCENT,
+                                                  &modified_regfile_content,
+                                                  cancellable, error))
+        goto out;
+    }
+  else
+    modified_regfile_content = g_hash_table_new (g_str_hash, g_str_equal);
+
+  g_printerr ("modified: %u\n", g_hash_table_size (modified_regfile_content));
+  g_printerr ("new reachable: metadata=%u content regular=%u symlink=%u\n",
               g_hash_table_size (new_reachable_metadata),
-              g_hash_table_size (new_reachable_content));
+              g_hash_table_size (new_reachable_regfile_content),
+              g_hash_table_size (new_reachable_symlink_content));
 
   /* We already ship the to commit in the superblock, don't ship it twice */
   g_hash_table_remove (new_reachable_metadata,
@@ -780,7 +787,7 @@ generate_delta_lowlatency (OstreeRepo                       *repo,
                                                              g_free,
                                                              (GDestroyNotify) content_rollsums_free);
 
-  g_hash_table_iter_init (&hashiter, modified_content_objects);
+  g_hash_table_iter_init (&hashiter, modified_regfile_content);
   while (g_hash_table_iter_next (&hashiter, &key, &value))
     {
       const char *to_checksum = key;
@@ -800,7 +807,7 @@ generate_delta_lowlatency (OstreeRepo                       *repo,
 
   g_printerr ("rollsum for %u/%u modified\n",
               g_hash_table_size (rollsum_optimized_content_objects),
-              g_hash_table_size (modified_content_objects));
+              g_hash_table_size (modified_regfile_content));
 
   current_part = allocate_part (builder);
 
@@ -837,22 +844,18 @@ generate_delta_lowlatency (OstreeRepo                       *repo,
   /* Scan for large objects, so we can fall back to plain HTTP-based
    * fetch.
    */
-  g_hash_table_iter_init (&hashiter, new_reachable_content);
+  g_hash_table_iter_init (&hashiter, new_reachable_regfile_content);
   while (g_hash_table_iter_next (&hashiter, &key, &value))
     {
-      GVariant *serialized_key = key;
-      const char *checksum;
-      OstreeObjectType objtype;
+      const char *checksum = key;
       guint64 uncompressed_size;
       gboolean fallback = FALSE;
 
-      ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
-
       /* Skip content objects we rollsum'd */
       if (g_hash_table_contains (rollsum_optimized_content_objects, checksum))
         continue;
 
-      if (!ostree_repo_load_object_stream (repo, objtype, checksum,
+      if (!ostree_repo_load_object_stream (repo, OSTREE_OBJECT_TYPE_FILE, checksum,
                                            NULL, &uncompressed_size,
                                            cancellable, error))
         goto out;
@@ -862,30 +865,37 @@ generate_delta_lowlatency (OstreeRepo                       *repo,
       if (fallback)
         {
           gs_free char *size = g_format_size (uncompressed_size);
-          g_printerr ("fallback for %s (%s)\n",
-                      ostree_object_to_string (checksum, objtype), size);
+          g_printerr ("fallback for %s (%s)\n", checksum, size);
           g_ptr_array_add (builder->fallback_objects, 
-                           g_variant_ref (serialized_key));
+                           ostree_object_name_serialize (checksum, OSTREE_OBJECT_TYPE_FILE));
           g_hash_table_iter_remove (&hashiter);
         }
     }
 
-  /* Now non-rollsummed content */
-  g_hash_table_iter_init (&hashiter, new_reachable_content);
+  /* Now non-rollsummed regular file content */
+  g_hash_table_iter_init (&hashiter, new_reachable_regfile_content);
   while (g_hash_table_iter_next (&hashiter, &key, &value))
     {
-      GVariant *serialized_key = key;
-      const char *checksum;
-      OstreeObjectType objtype;
-
-      ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
+      const char *checksum = key;
 
       /* Skip content objects we rollsum'd */
       if (g_hash_table_contains (rollsum_optimized_content_objects, checksum))
         continue;
 
       if (!process_one_object (repo, builder, &current_part,
-                               checksum, objtype,
+                               checksum, OSTREE_OBJECT_TYPE_FILE,
+                               cancellable, error))
+        goto out;
+    }
+
+  /* Now symlinks */
+  g_hash_table_iter_init (&hashiter, new_reachable_symlink_content);
+  while (g_hash_table_iter_next (&hashiter, &key, &value))
+    {
+      const char *checksum = key;
+
+      if (!process_one_object (repo, builder, &current_part,
+                               checksum, OSTREE_OBJECT_TYPE_FILE,
                                cancellable, error))
         goto out;
     }
index 2478f1672eb25c95e0fa2e90a31eedd01ae1e95d..55ef437f2cfd86647bfcda1be923d8ec4ac8b3db 100644 (file)
@@ -152,4 +152,23 @@ _ostree_repo_static_delta_part_have_all_objects (OstreeRepo             *repo,
                                                  gboolean               *out_have_all,
                                                  GCancellable           *cancellable,
                                                  GError                **error);
+
+typedef struct {
+  char *checksum;
+  guint64 size;
+  GPtrArray *basenames;
+} OstreeDeltaContentSizeNames;
+
+void _ostree_delta_content_sizenames_free (gpointer v);
+
+gboolean
+_ostree_delta_compute_similar_objects (OstreeRepo                 *repo,
+                                       GVariant                   *from_commit,
+                                       GVariant                   *to_commit,
+                                       GHashTable                 *new_reachable_regfile_content,
+                                       guint                       similarity_percent_threshold,
+                                       GHashTable                **out_modified_regfile_content,
+                                       GCancellable               *cancellable,
+                                       GError                    **error);
+
 G_END_DECLS